MonolayerCultures / src / Integrations / RC17_GFP32 / [RC17][GFP32]Integration.Rmd
[RC17][GFP32]Integration.Rmd
Raw
---
title: "RC17 & GFP32 Integration"
author: "Nina-Lydia Kazakou"
date: "19/03/2022"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Set-up

### Load libraries

```{r message=FALSE, warning=FALSE}
library(Seurat)
library(ggplot2)
library(ggsci)
library(here)
```

### Colour Palette

```{r load_palette}
mypal <- pal_npg("nrc", alpha = 0.7)(10)
mypal2 <-pal_tron("legacy", alpha = 0.7)(7)
mypal3<-pal_lancet("lanonc", alpha = 0.7)(9)
mypal4<-pal_simpsons(palette = c("springfield"), alpha = 0.7)(16)
mypal5 <- pal_rickandmorty(palette = c("schwifty"), alpha = 0.7)(6)
mypal6 <- pal_futurama(palette = c("planetexpress"), alpha = 0.7)(5)
mypal7 <- pal_startrek(palette = c("uniform"), alpha = 0.7)(5)
mycoloursP<- c(mypal, mypal2, mypal3, mypal4)
```

### Load objects

```{r}
RC17 <- readRDS(here("data", "Processed", "AllCells", "RC17_AllCells_srt.rds"))
```

```{r eval=FALSE}
GFP32 <- readRDS(here("data", "Processed", "Original_Objects", "GFP32_annotated_srt.rds"))
```

#### Polish GFP32 metadata and save object

```{r eval=FALSE}
metadata.filt <- GFP32@meta.data

metadata.filt$Cells <- rownames(metadata.filt)

metadata.filt$Sample <- NA
t1 <- grepl("^preOL", metadata.filt$Cells)
t2 <- grepl("^earlyOL_", metadata.filt$Cells)
t3 <- grepl("^lateOL_", metadata.filt$Cells)

metadata.filt$Sample[which(t1)] <- "wk1"
metadata.filt$Sample[which(t2)] <- "wk2"
metadata.filt$Sample[which(t3)] <- "wk3"

GFP32@meta.data <- metadata.filt

GFP32@meta.data$dataset <- NULL
GFP32@meta.data$Dataset <- "GFP32_Monolayer_AllCells"

GFP32@meta.data$seurat_clusters <- NULL

GFP32[["percent.ribo"]] <- PercentageFeatureSet(GFP32, pattern = "^RP[SL]")

GFP32@meta.data$ClusterID <- GFP32@meta.data$Cluster_id
GFP32@meta.data$Cluster_id <- NULL


current.ids <- c("RADIAL GLIA", "GLIA PROGENITOR", "ATROCYTE 1", "OPC 1", "NEURON 1", "OLIGO 2", "CYCLING CELLS 1", "NEURON 2", "OPC 2", "PRE-OPC", "ASTROCYTE 2", "OLIGO 1", "OLIGO 3", "CYCLING CELLS 2", "PERICYTE")
new.ids <- c("Radial_Glia", "Glial_Progenitors", "Astrocytes_1", "OPCs_1", "Neurons_1", "Oligos_2", "CyC_1", "Neurons_2", "OPCs_2", "pre-OPCs", "Astrocytes_2", "Oligos_1", "Oligos_3", "CyC_2", "Pericytes")
GFP32@meta.data$ClusterID <- plyr::mapvalues(x = GFP32@meta.data$ClusterID, from = current.ids, to = new.ids)

GFP32 <- CellCycleScoring(object = GFP32, g2m.features = cc.genes$g2m.genes,s.features = cc.genes$s.genes)

head(GFP32@meta.data, 2)
```

```{r}
GFP32 <- readRDS(here("data", "Processed", "GFP32", "GFP32_AllCells_srt.rds"))
``` 

```{r eval=FALSE}
dir.create(here("data", "Processed", "GFP32"))
saveRDS(GFP32, here("data", "Processed", "GFP32", "GFP32_AllCells_srt.rds"))
```

```{r Pre-process_metadata}
GFP32@meta.data$GFP32_ClusterID <- GFP32@meta.data$ClusterID

GFP32@meta.data$Dataset_ClusterID <- GFP32@meta.data$ClusterID
old <- c("Radial_Glia", "Glial_Progenitors", "Astrocytes_1", "OPCs_1", "Neurons_1", "Oligos_2", "CyC_1", "Neurons_2", "OPCs_2", "pre-OPCs", "Astrocytes_2", "Oligos_1", "Oligos_3", "CyC_2", "Pericytes")
new <- c("GFP32_Radial_Glia", "GFP32_Glial_Progenitors", "GFP32_Astrocytes_1", "GFP32_OPCs_1", "GFP32_Neurons_1", "GFP32_Oligos_2", "GFP32_CyC_1", "GFP32_Neurons_2", "GFP32_OPCs_2", "GFP32_pre-OPCs", "GFP32_Astrocytes_2", "GFP32_Oligos_1", "GFP32_Oligos_3", "GFP32_CyC_2", "GFP32_Pericytes")
GFP32@meta.data$Dataset_ClusterID <- plyr::mapvalues(x = GFP32@meta.data$Dataset_ClusterID, from = old, to = new)

GFP32@meta.data$ClusterID <- NULL


RC17@meta.data$RC17_ClusterID <- RC17@meta.data$ClusterID

RC17@meta.data$Dataset_ClusterID <- RC17@meta.data$ClusterID
old.id <- c("Radial_Glia_1", "Radial_Glia/Pre-OPCs_1", "Astrocytes_1","Astrocytes_2", "Oligodendroglia_1", "Unknown", "Radial_Glia_2", "Outer-Radial_Glia_1", "Outer-Radial_Glia_2", "Radial_Glia/Pre-OPCs_2", "Neurons_1", "Neurons_2", "Oligodendroglia_2", "Radial_Glia_3", "Oligodendroglia_3", "Neurons_3", "Oligodendroglia_4", "Oligodendroglia_5", "Oligodendroglia_6", "Pericytes", "Outer-Radial_Glia_3")
new.id <-c("RC17_Radial_Glia_1", "RC17_Radial_Glia/Pre-OPCs_1", "RC17_Astrocytes_1","RC17_Astrocytes_2", "RC17_Oligodendroglia_1", "RC17_Unknown", "RC17_Radial_Glia_2", "RC17_Outer-Radial_Glia_1", "RC17_Outer-Radial_Glia_2", "RC17_Radial_Glia/Pre-OPCs_2", "RC17_Neurons_1", "RC17_Neurons_2", "RC17_Oligodendroglia_2", "RC17_Radial_Glia_3", "RC17_Oligodendroglia_3", "RC17_Neurons_3", "RC17_Oligodendroglia_4", "RC17_Oligodendroglia_5", "RC17_Oligodendroglia_6", "RC17_Pericytes", "RC17_Outer-Radial_Glia_3")
RC17@meta.data$Dataset_ClusterID <- plyr::mapvalues(x = RC17@meta.data$Dataset_ClusterID, from = old.id, to = new.id)

RC17@meta.data$ClusterID <- NULL
```


# Integration for RC17 & GFP32

## CCA

```{r CCA}
ol_anchors <- FindIntegrationAnchors(object.list = list(GFP32, RC17), 
                                     dims = 1:20) 

ol_comb <- IntegrateData(anchorset = ol_anchors, dims = 1:20)

DefaultAssay(ol_comb) <- "integrated"
```

# Run the standard workflow for visualization and clustering
```{r Clustering_Integrated_srt}
all_genes <- rownames(ol_comb)
ol_comb <- ScaleData(ol_comb, features = all_genes)
ol_comb <- RunPCA(ol_comb, npcs = 30, verbose = FALSE)
# t-SNE and Clustering
ol_comb <- RunUMAP(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindNeighbors(ol_comb, reduction = "pca", dims = 1:20)
ol_comb <- FindClusters(ol_comb, resolution = 0.5)
```

```{r eval=FALSE}
dir.create(here("data", "Processed", "Integrations", "GFP32_RC17"))
saveRDS(ol_comb, here("data", "Processed", "Integrations", "GFP32_RC17", "GFP32_RC17_AllCells_comb.rds"))
```


# Visualization

```{r}
DimPlot(ol_comb, group.by = "Dataset", pt.size = 0.8, cols = c("hotpink", mycoloursP[17])) 
```

```{r fig.width=8}
DimPlot(ol_comb, group.by = "Dataset", split.by = "Dataset", pt.size = 0.8, cols = c("hotpink", mycoloursP[17])) & NoLegend()
```


```{r}
DimPlot(ol_comb, group.by = "orig.ident", pt.size = 0.8, cols = c(mycoloursP[10:13], mycoloursP[5:7]))
```

```{r fig.height=3, fig.width=12}
DimPlot(ol_comb, group.by = "orig.ident", split.by = "orig.ident", pt.size = 0.8, cols = c(mycoloursP[10:13], mycoloursP[5:7])) & NoLegend()
```

```{r fig.height=3, fig.width=8}
DimPlot(ol_comb, group.by = "orig.ident", split.by = "Sample", pt.size = 0.8, cols = c(mycoloursP[10:13], mycoloursP[5:7])) 
```

```{r fig.height=3, fig.width=8}
DimPlot(ol_comb, group.by = "Dataset", split.by = "Phase", pt.size = 0.8, cols = c("hotpink", mycoloursP[17])) 
```


```{r fig.height=5, fig.width=12}
DimPlot(ol_comb, group.by = "Dataset_ClusterID", split.by = "Dataset", pt.size = 0.8, cols = mycoloursP) 
```

```{r fig.height=10, fig.width=10, message=FALSE, warning=FALSE}
DimPlot(ol_comb, group.by = "RC17_ClusterID", split.by = "Dataset", pt.size = 0.8, cols = mycoloursP, label = TRUE, label.size = 3) & NoLegend()
DimPlot(ol_comb, group.by = "GFP32_ClusterID", split.by = "Dataset", pt.size = 0.8, cols = mycoloursP, label = TRUE, label.size = 3) & NoLegend()
```

```{r}
rm(GFP32, RC17, ol_comb, ol_anchors)
```


## Label Transfer for Oligodendroglia (from RC17 to GFP32)

```{r}
RC17_ol <- readRDS(here("data", "Processed", "Oligodendroglia", "hES-derived_monolayerOL.rds"))

GFP32 <- readRDS(here("data", "Processed", "GFP32", "GFP32_AllCells_srt.rds"))
Idents(GFP32) <- "ClusterID"
GFP32_ol <- subset(GFP32, idents = c("Oligos_1", "Oligos_2", "Oligos_3", "OPCs_1", "OPCs_2", "pre-OPCs"))
```


```{r}
lt_anchors <- FindTransferAnchors(reference = RC17_ol , query = GFP32_ol, 
    dims = 1:30)
predictions <- TransferData(anchorset = lt_anchors, refdata = RC17_ol@meta.data$ClusterID, 
    dims = 1:30)
lt_GFP32_ol <- AddMetaData(GFP32_ol, metadata = predictions)
```

```{r}
DimPlot(lt_GFP32_ol, group.by = "ClusterID" , cols = mycoloursP[10:40], label = TRUE)

DimPlot(lt_GFP32_ol, group.by = "predicted.id", cols = mycoloursP[11:40] )

DimPlot(lt_GFP32_ol, group.by = "predicted.id", cols = mycoloursP[11:40], label = "TRUE", label.size = 3) & NoLegend()
```

```{r fig.height=10, fig.width=8}
DimPlot(lt_GFP32_ol, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[11:40], ncol = 2) &  NoLegend()
```

### Correlation between ol ID and predicted ID

```{r fig.height=8, fig.width=6}
library(corrplot)

met_dat <- lt_GFP32_ol@meta.data
cor_df <- data.frame(orig_id = met_dat$ClusterID,
                     pred_id = met_dat$predicted.id)
cor_df$pred_id <- as.factor(cor_df$pred_id)
cor_df$pred_id <- factor(cor_df$pred_id, levels = c("OPC_1", "pri-OPC", "OPC_2", "MAO", "OAPC", "OLIGO_1", "CyP_1", "Cyp_2", "OLIGO_2", "OPC_3", "SPARCL1+ OLIGO_3"))
cor_matr <- matrix(data= 0, 
                   nrow = length(levels(cor_df$orig_id)), 
                   ncol = length(levels(cor_df$pred_id)))
rownames(cor_matr) <- levels(cor_df$orig_id)
colnames(cor_matr) <- levels(cor_df$pred_id)
for(i in 1: nrow(cor_df)){
  name1 <- as.character(cor_df[i, 1])
  name2 <- as.character(cor_df[i, 2])
  cor_matr[name1, name2] <- cor_matr[name1, name2] + 1
}
max(cor_matr)
centr_cor_matr <- cor_matr/ max(cor_matr)
corrplot(centr_cor_matr)
```


## Label Transfer for Oligodendroglia (from GFP32 to RC17)

```{r}
lt_anchors <- FindTransferAnchors(reference = GFP32_ol , query = RC17_ol, 
    dims = 1:30)
predictions <- TransferData(anchorset = lt_anchors, refdata = GFP32_ol@meta.data$ClusterID, 
    dims = 1:30)
lt_RC17_ol <- AddMetaData(RC17_ol, metadata = predictions)
```


```{r}
DimPlot(lt_RC17_ol, group.by = "ClusterID" , cols = mycoloursP[10:40], label = TRUE)

DimPlot(lt_RC17_ol, group.by = "predicted.id", cols = mycoloursP[11:40] )

DimPlot(lt_RC17_ol, group.by = "predicted.id", cols = mycoloursP[11:40], label = "TRUE", label.size = 3) & NoLegend()
```

```{r fig.height=8, fig.width=8}
DimPlot(lt_RC17_ol, group.by = "predicted.id", split.by = "predicted.id", cols = mycoloursP[11:40], ncol = 2) &  NoLegend()
```

### Correlation between ol ID and predicted ID
```{r fig.height=5, fig.width=6}
met_dat <- lt_RC17_ol@meta.data
cor_df <- data.frame(RC17_ol_id = met_dat$ClusterID, nature_id = met_dat$predicted.id)
cor_df$nature_id <- as.factor(cor_df$nature_id)
cor_df$nature_id <- factor(cor_df$nature_id, levels = c("pre-OPCs", "OPCs_1", "OPCs_2", "Oligos_1", "Oligos_2", "Oligos_3"))
cor_matr <- matrix(data= 0, 
                   nrow = length(levels(cor_df$RC17_ol_id)), 
                   ncol = length(levels(cor_df$nature_id)))
rownames(cor_matr) <- levels(cor_df$RC17_ol_id)
colnames(cor_matr) <- levels(cor_df$nature_id)
for(i in 1: nrow(cor_df)){
  name1 <- as.character(cor_df[i, 1])
  name2 <- as.character(cor_df[i, 2])
  cor_matr[name1, name2] <- cor_matr[name1, name2] + 1
}
max(cor_matr)
centr_cor_matr <- cor_matr/ max(cor_matr)
corrplot(centr_cor_matr)
```

### Correct counts not for largest total counts, but first for cluster size
```{r}
clu_counts <- table(cor_counts = lt_RC17_ol@meta.data$ClusterID)
resiz_mat <- cor_matr
for(i in length(rownames(resiz_mat))){
  resiz_mat[rownames(resiz_mat)[i], ]  <- resiz_mat["pri-OPC", ] / clu_counts[rownames(resiz_mat)[i]]
}
# Now bring the data on a scale of 0-1 to enable corrplots
centr_resiz_mat <- resiz_mat/ max(resiz_mat)
```

```{r fig.height=5, fig.width=6, message=FALSE, warning=FALSE}
library(RColorBrewer)

corrplot(centr_resiz_mat, type = "full",method = "circle", 
  col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2), 
  cl.lim = c(0,1),
  mar = c(1, 0, 1, 0),
   tl.col = "black") 
```


```{r}
sessionInfo()
```